import requests
from bs4 import BeautifulSoup
import csv


def extract_news_data(soup):
    """从HTML中提取新闻标题、信息和链接"""
    titles = []
    infos = []
    links = []
    times = []

    # 遍历所有可能的新闻ID (0-249)
    for i in range(0, 250):
        news_id = f"newsTr{i}"
        news_item = soup.find('li', id=news_id)

        if not news_item:
            continue

        # 提取标题和链接
        title_elem = news_item.find('p', class_='title').find('a', href=True) if news_item.find('p',
                                                                                                class_='title') else None
        if title_elem:
            title = title_elem.get_text(strip=True)
            link = title_elem['href']
        else:
            title = "无标题"
            link = "无链接"

        # 提取信息
        info_elem = news_item.find('p', class_='info')
        if info_elem:
            info = info_elem.get('title', '').strip()
        else:
            info = "无信息"

        # 提取时间
        time_elem = news_item.find('n', class_='time')
        time_text = time_elem.get_text(strip=True) if time_elem else "无时间"

        titles.append(title)
        infos.append(info)
        links.append(link)
        times.append(time_text)

    return titles, infos, links, times


def get_news_nums(data_list):
    """统计新闻数量"""
    amt = len(data_list)
    print(f"总共有{amt}条热点新闻")
    return amt


def save_csv(titles, infos, links, times, filename='eastmoney_data.csv'):
    """保存数据到CSV文件"""
    with open(filename, 'w', newline='', encoding='utf-8-sig') as csvfile:
        writer = csv.writer(csvfile)
        # 写入表头
        writer.writerow(['标题', '信息内容', '链接地址', '发布时间'])

        # 写入数据
        for i in range(len(titles)):
            title = titles[i] if i < len(titles) else ''
            info = infos[i] if i < len(infos) else ''
            link = links[i] if i < len(links) else ''
            time = times[i] if i < len(times) else ''
            writer.writerow([title, info, link, time])

    print(f"数据已保存到 {filename}")


def main():
    """主函数"""
    URL = "https://finance.eastmoney.com/a/cywjh_1.html"

    try:
        # 发送请求
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(URL, headers=headers)
        print(f"请求状态码: {response.status_code}")
        response.encoding = "UTF-8"

        # 解析HTML
        soup = BeautifulSoup(response.text, "html.parser")
        li = soup.find_all('a')
        print(li)

        # 提取新闻数据
        titles, infos, links, times = extract_news_data(soup)

        # 显示统计信息
        news_count = get_news_nums(titles)

        # 打印前几条新闻作为示例
        print("\n前5条新闻示例:")
        for i in range(min(5, len(titles))):
            print(f"{i + 1}. 标题: {titles[i]}")
            print(f"   信息: {infos[i][:80]}...")
            print(f"   链接: {links[i]}")
            print(f"   时间: {times[i]}")
            print("-" * 80)

        # 保存到CSV
        save_csv(titles, infos, links, times)

    except requests.RequestException as e:
        print(f"请求出错: {e}")
    except Exception as e:
        print(f"处理过程中出错: {e}")


# 运行主函数
if __name__ == "__main__":
    main()
